
from lxml import etree

# 解析本地文件  ./files/xiaotuxian.html
location_tree = etree.parse('../files/test.html')

# print(location_tree )

# 路径查找

# 查找 ul>li
search_ul = location_tree.xpath("//ul//li//a/text()")
print(len(search_ul),search_ul)

# 查找 id
search_id = location_tree.xpath("//ul//li//a[@id]/text()")
print(len(search_id),search_id)

# 精确查找 Precise search
# id = apple  id = durian
pre_search_apple = location_tree.xpath("//ul//li//a[@id='apple']/text()")
print(len(pre_search_apple),pre_search_apple)

pre_search_durian = location_tree.xpath("//ul//li//a[@id='durian']/text()")
print(len(pre_search_durian),pre_search_durian)


# 查找 class类 id = banner  class = banner  div标签
search_banner = location_tree.xpath("//div[@id='banner']/@class")
print(len(search_banner),search_banner)


# 模糊查找  div中 id 包含 shop
search_id_shop = location_tree.xpath("//div[contains(@id,'shop')]/text()")
print(len(search_id_shop),search_id_shop)

# 模糊查找 以 xxx- 开头
# ol > li 以 img- 为开头
search_id_img = location_tree.xpath("//ol//li[starts-with(@id,'img')]/text()")
print(len(search_id_img),search_id_img)

# ol > li 以 page- 为开头
search_id_page = location_tree.xpath("//ol//li[starts-with(@id,'page')]/text()")
print(len(search_id_page),search_id_page)

# 逻辑运算
# 查找 div  class = shop and id = shop2
search_shop_name = location_tree.xpath("//div[@class='shop' and @id='shop2']/text()")
print(len(search_shop_name),search_shop_name)

# 查找 div  id = shop1 | id = shop2
search_shop_id_name = location_tree.xpath("//div[@id='shop1']/text() | //div[@id='shop2']/text()")
print(len(search_shop_id_name),search_shop_id_name)


